"%&%" = function(a,b) paste(a,b,sep="")
library(ggplot2)
library(dplyr)
library(tidyr)
#my.dir <- '/Volumes/im-lab/nas40t2/hwheeler/cross-tissue/BSLMM_exp/'
my.dir <- '~/GitHub/GenArch/GenArchPaper/BSLMM/'
source('~/GitHub/GenArch/GenArchPaper/multiplot.R')
fig.dir <- '~/GitHub/GenArch/GenArchPaper/Figures/'
mil<-read.table(my.dir %&% 'DGN-WB_exp_BSLMM_1M_iterations_genes_finished_in_100hrs_2015-06-16.txt',header=T)
hun<-read.table(my.dir %&% 'DGN-WB_exp_BSLMM-s100K_iterations_all_genes_2015-06-14.txt',header=T)
hun<-hun[complete.cases(hun),]
all<-inner_join(mil,hun,by='gene')
dim(all)
## [1] 1421 37
##plot diff in median PVE
ggplot(all,aes(x=pve50.x,y=pve50.y)) + geom_point() + xlab("median PVE (1M iterations)") + ylab("median PVE (100K iterations)") + geom_abline(c(0,1))
ggplot(all,aes(x=pve50.x,y=pve50.x-pve50.y)) + geom_point() + xlab("median PVE (1M iterations)") + ylab("med PVE (1M) - med PVE (100K)")
##plot diff in upper 95% credible interval PVE
ggplot(all,aes(x=pve975.x,y=pve975.y)) + geom_point() + xlab("upper CI PVE (1M iterations)") + ylab("upper CI PVE (100K iterations)") + geom_abline(c(0,1))
ggplot(all,aes(x=pve975.x,y=pve975.x-pve975.y)) + geom_point() + xlab("upper CI PVE (1M iterations)") + ylab("upper CI PVE (1M) - upper CI PVE (100K)")
##plot diff in lower 95% credible interval PVE
ggplot(all,aes(x=pve025.x,y=pve025.y)) + geom_point() + xlab("lower CI PVE (1M iterations)") + ylab("lower CI PVE (100K iterations)") + geom_abline(c(0,1))
ggplot(all,aes(x=pve025.x,y=pve025.x-pve025.y)) + geom_point() + xlab("lower CI PVE (1M iterations)") + ylab("lower CI PVE (1M) - lower CI PVE (100K)")
##plot diff in median PGE
ggplot(all,aes(x=pge50.x,y=pge50.y)) + geom_point() + xlab("median PGE (1M iterations)") + ylab("median PGE (100K iterations)") + geom_abline(c(0,1))
ggplot(all,aes(x=pge50.x,y=pge50.x-pge50.y)) + geom_point() + xlab("median PGE (1M iterations)") + ylab("med PGE (1M) - med PGE (100K)")
##plot diff in upper 95% credible interval PGE
ggplot(all,aes(x=pge975.x,y=pge975.y)) + geom_point() + xlab("upper CI PGE (1M iterations)") + ylab("upper CI PGE (100K iterations)") + geom_abline(c(0,1))
ggplot(all,aes(x=pge975.x,y=pge975.x-pge975.y)) + geom_point() + xlab("upper CI PGE (1M iterations)") + ylab("upper CI PGE (1M) - upper CI PGE (100K)")
##plot diff in lower 95% credible interval PGE
ggplot(all,aes(x=pge025.x,y=pge025.y)) + geom_point() + xlab("lower CI PGE (1M iterations)") + ylab("lower CI PGE (100K iterations)") + geom_abline(c(0,1))
ggplot(all,aes(x=pge025.x,y=pge025.x-pge025.y)) + geom_point() + xlab("lower CI PGE (1M iterations)") + ylab("lower CI PGE (1M) - lower CI PGE (100K)")
data <- hun %>% arrange(pve50) %>% mutate(position=1:length(pve50),`LCS>0.01`=pge025>0.01)
head(data,n=3L)
## gene h50 pve50 rho50 pge50 pi50 n_gamma50
## 1 SHCBP1 0.007489189 0.002255288 0.6748546 0.2452269 0.02470316 2
## 2 PEX11B 0.009904461 0.002262330 0.7816407 0.0995318 0.01386788 1
## 3 POLR3GL 0.012444085 0.002421142 0.7465017 0.1491072 0.01726753 1
## h025 pve025 rho025 pge025 pi025 n_gamma025
## 1 0.0002431027 7.795688e-05 0.04658053 0 0.008353717 0
## 2 0.0002672425 6.494790e-05 0.06241255 0 0.007701220 0
## 3 0.0002618914 6.331537e-05 0.04661430 0 0.009215575 0
## h975 pve975 rho975 pge975 pi975 n_gamma975 position
## 1 0.2583769 0.01672958 0.9960595 0.9583445 0.13095160 16 1
## 2 0.4109267 0.01385198 0.9977285 0.9449034 0.03397639 6 2
## 3 0.3093211 0.02252967 0.9968665 0.9399027 0.08992310 8 3
## LCS>0.01
## 1 FALSE
## 2 FALSE
## 3 FALSE
tail(data,n=3L)
## gene h50 pve50 rho50 pge50 pi50
## 13167 TMEM176B 0.7503864 0.8450594 0.9979710 0.9987881 0.00514503
## 13168 HLA-DQA2 0.8727005 0.8454030 0.9981617 0.9976210 0.01233211
## 13169 HLA-DQB1 0.7047662 0.8590246 0.9921770 0.9976744 0.01490577
## n_gamma50 h025 pve025 rho025 pge025 pi025
## 13167 7 0.6236936 0.8263025 0.9864737 0.9940242 0.004153668
## 13168 7 0.7816838 0.8316794 0.9899553 0.9914287 0.011385030
## 13169 10 0.4994558 0.8430376 0.9405188 0.9871328 0.008630525
## n_gamma025 h975 pve975 rho975 pge975 pi975
## 13167 5 0.9226421 0.8542179 0.9998271 0.9999435 0.006703949
## 13168 6 0.9457489 0.8585409 0.9998575 0.9997468 0.013859300
## 13169 8 0.8340207 0.8713745 0.9998262 0.9998754 0.024743210
## n_gamma975 position LCS>0.01
## 13167 9 13167 TRUE
## 13168 9 13168 TRUE
## 13169 15 13169 TRUE
ggplot(data,aes(x=position,y=pve50,ymin=pve025,ymax=pve975,col=`LCS>0.01`)) + geom_pointrange(col='gray')+geom_point()
ggplot(data,aes(x=position,y=pge50,ymin=pge025,ymax=pge975,col=`LCS>0.01`)) + geom_pointrange(col='gray')+geom_point()
ggplot(data,aes(x=position,y=n_gamma50,ymin=n_gamma025,ymax=n_gamma975,col=`LCS>0.01`)) + geom_pointrange(col='gray')+geom_point()
data <- hun %>% arrange(pge50) %>% mutate(position=1:length(pge50),`LCS>0.01`=pge025>0.01,`medianSNPs<=10`=n_gamma50<=10)
head(data,n=3L)
## gene h50 pve50 rho50 pge50 pi50
## 1 COX7C 0.01692984 0.003432102 0.8281494 0.0001469885 0.001273520
## 2 TOP2A 0.01844599 0.003377690 0.8615399 0.0091151460 0.001745283
## 3 CLEC18B 0.02054125 0.006086327 0.7558502 0.0126856400 0.001241263
## n_gamma50 h025 pve025 rho025 pge025 pi025
## 1 1 0.0003328227 0.0001036626 0.07951984 0 0.0009400815
## 2 1 0.0004504753 0.0001032576 0.06883884 0 0.0011945301
## 3 1 0.0006393358 0.0002164674 0.04436956 0 0.0008424312
## n_gamma025 h975 pve975 rho975 pge975 pi975
## 1 0 0.4096393 0.01789460 0.9981367 0.9269078 0.002704068
## 2 0 0.6704550 0.01887628 0.9993515 0.9440432 0.004614959
## 3 0 0.6201597 0.02877017 0.9980631 0.9162264 0.002540946
## n_gamma975 position LCS>0.01 medianSNPs<=10
## 1 4 1 FALSE TRUE
## 2 4 2 FALSE TRUE
## 3 4 3 FALSE TRUE
tail(data,n=3L)
## gene h50 pve50 rho50 pge50 pi50
## 13167 C22orf32 0.7375050 0.7554564 0.9988335 0.9989030 0.002784994
## 13168 FBLN5 0.5201401 0.7271755 0.9959768 0.9989287 0.001934650
## 13169 ERAP1 0.7306787 0.8422865 0.9975138 0.9990240 0.002924236
## n_gamma50 h025 pve025 rho025 pge025 pi025
## 13167 1 0.4262714 0.7344221 0.9827528 0.9923274 0.0016715230
## 13168 3 0.2449352 0.7027841 0.9504776 0.9899655 0.0009012359
## 13169 5 0.5228048 0.8280668 0.9763151 0.9938152 0.0020799960
## n_gamma025 h975 pve975 rho975 pge975 pi975
## 13167 1 0.9110360 0.7683610 0.9999084 0.9998308 0.004217367
## 13168 3 0.7132473 0.7510260 0.9997500 0.9998127 0.003870395
## 13169 5 0.8333501 0.8557812 0.9996419 0.9998840 0.003394083
## n_gamma975 position LCS>0.01 medianSNPs<=10
## 13167 3 13167 TRUE TRUE
## 13168 5 13168 TRUE TRUE
## 13169 7 13169 TRUE TRUE
ggplot(data,aes(x=position,y=pve50,ymin=pve025,ymax=pve975,col=`LCS>0.01`)) + geom_pointrange(col='gray')+geom_point()
ggplot(data,aes(x=position,y=pge50,ymin=pge025,ymax=pge975,col=`LCS>0.01`)) + geom_pointrange(col='gray')+geom_point()
ggplot(data,aes(x=position,y=n_gamma50,ymin=n_gamma025,ymax=n_gamma975,col=`LCS>0.01`)) + geom_pointrange(col='gray')+geom_point()
ggplot(data,aes(x=pve50,y=pge50,ymin=pge025,ymax=pge975,col=`LCS>0.01`)) + geom_pointrange(col='gray') + geom_point() + theme_bw() #theme_classic() for no grid
cor.test(hun$pge50,hun$pve50)
##
## Pearson's product-moment correlation
##
## data: hun$pge50 and hun$pve50
## t = 98.406, df = 13167, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.6410351 0.6607207
## sample estimates:
## cor
## 0.6509873
ggplot(data,aes(x=pve50,y=log10(n_gamma50),col=`LCS>0.01`)) + geom_point(alpha=0.3)
b<-ggplot(data,aes(x=pve50,y=pge50,ymin=pge025,ymax=pge975,col=`LCS>0.01`)) + geom_pointrange(col='gray') + geom_point() + theme_bw() + xlab("median PVE") + ylab("median PGE") + theme(legend.position = c(1,0),legend.justification = c(1,0))
c<-ggplot(data,aes(x=pve50,y=n_gamma50,ymin=n_gamma025,ymax=n_gamma975,col=`medianSNPs<=10`)) + geom_pointrange(col="gray") + geom_point(alpha=0.4) + theme_bw() + xlab("median PVE") + ylab("median number of SNPs") + theme(legend.position = c(1,1),legend.justification = c(1,1))
#gcta <- read.table('/Volumes/im-lab/nas40t2/hwheeler/cross-tissue/expArch_DGN-WB_imputedGTs/DGN-WB.h2.all.models_FHSfdr0.05.all.Chr1-22_globalOtherChr.2015-03-18.txt',header=TRUE)
gcta <- read.table('~/Dropbox/cross-tissue/expArch_DGN-FHS_results/DGN-WB.h2.all.models_FHSfdr0.05.all.Chr1-22_globalOtherChr.2015-03-18.txt',header=TRUE)
bslmm <- read.table(my.dir %&% 'DGN-WB_exp_BSLMM-s100K_iterations_all_genes_2015-06-14.txt',header=T)
all <- inner_join(gcta,bslmm,by='gene')
## Warning in inner_join_impl(x, y, by$x, by$y): joining factors with
## different levels, coercing to character vector
dim(all)
## [1] 12682 34
a<-ggplot(all,aes(x=local.h2,y=pve50))+geom_point(alpha=0.4)+coord_cartesian(xlim=c(0,1),ylim=c(0,1))+xlab(expression("GCTA h"^2))+ylab('BSLMM PVE')+geom_abline(c(0,1),color='red')+theme_bw()
cor.test(all$local.h2,all$pve50)
##
## Pearson's product-moment correlation
##
## data: all$local.h2 and all$pve50
## t = 374.64, df = 12680, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.9562086 0.9590936
## sample estimates:
## cor
## 0.9576752
tiff(filename=fig.dir %&% "Fig-DGN-BSLMM.tiff",width=360,height=1080)
multiplot(a+ggtitle('A\n')+theme(plot.title=element_text(hjust=0),text=element_text(size=18)),b + ggtitle('B\n')+ theme(plot.title=element_text(hjust=0),text=element_text(size=18)),c + ggtitle('C\n')+ theme(plot.title=element_text(hjust=0),text=element_text(size=18)),cols=1)
## Loading required package: grid
dev.off()
## quartz_off_screen
## 2
png(filename=fig.dir %&% "Fig-DGN-BSLMM.png",width=360,height=1080)
multiplot(a+ggtitle('A\n')+theme(plot.title=element_text(hjust=0),text=element_text(size=18)),b + ggtitle('B\n')+ theme(plot.title=element_text(hjust=0),text=element_text(size=18)),c + ggtitle('C\n')+ theme(plot.title=element_text(hjust=0),text=element_text(size=18)),cols=1)
dev.off()
## quartz_off_screen
## 2
subdata <- select(data,pve50,pge50,`medianSNPs<=10`)
table(subdata[,3])/sum(table(subdata[,3]))
##
## FALSE TRUE
## 0.1942441 0.8057559
summary(subdata$pge50)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.000147 0.472400 0.791400 0.692900 0.932500 0.999000
subdata <- select(data,pve50,pge50,`medianSNPs<=10`) %>% filter(pve50>0.10)
table(subdata[,3])/sum(table(subdata[,3]))
##
## FALSE TRUE
## 0.03731053 0.96268947
summary(subdata$pge50)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.1325 0.9038 0.9488 0.9257 0.9739 0.9990
subdata <- select(data,pve50,pge50,`medianSNPs<=10`) %>% filter(pve50>0.50)
table(subdata[,3])/sum(table(subdata[,3]))
##
## FALSE TRUE
## 0.03907638 0.96092362
summary(subdata$pge50)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.8265 0.9808 0.9892 0.9844 0.9934 0.9990